Package installation
Note: my local evalcast and covidcast installations may be slightly ahead of the remote versions due to outstanding PRs.
remotes::install_github("cmu-delphi/covidcast", ref="main",
subdir = "R-packages/covidcast")
remotes::install_github("cmu-delphi/covidcast", ref = "evalcast-killcards",
subdir = "R-packages/evalcast")
remotes::install_github(repo="ryantibs/quantgen", subdir="R-package/quantgen")
# remotes::install_github("cmu-delphi/covidcast", ref = "main",
# subdir = "R-packages/modeltools")
Current model is quantile regression with 3 lags of the response (deaths) and 3 lags of cases. This is at the state level. The mobility model has a 28 day lag from the forecast date (should probably be from the target date)
# Quantile autoregression with 3 lags, or QAR3
corrector <- zookeeper::make_aardvark_corrector()
mob <- list()
for (a in seq_along(ahead)){
ah <- ahead[a]
lags <- list(c(0, 7, 14), c(0,7,14), max(28 - ah, 0)) # Lags (in days) for features
mob[[a]] <- get_predictions(
forecaster = quantgen_forecaster,
name_of_forecaster = "mob",
signals = tibble::tibble(
data_source = data_source,
signal = data_signals,
start_day = list(start_day_mob)),
forecast_dates = forecast_dates,
as_of_override = function(x) lubridate::ymd(Sys.Date()),
incidence_period = incidence_period,
ahead = ah,
geo_type = "state",
signal_aggregation = "list",
geo_values = "*",
apply_corrections = corrector,
featurize = dav_maker_mob,
n = n,
lags = lags, # optionally use a list for different lags by
lambda = 0, # Just do quantile regression
sort = sort,
nonneg = nonneg)
}
mob <- bind_rows(mob)
nomob <- get_predictions(
forecaster = quantgen_forecaster,
name_of_forecaster = "no-mob",
signals = tibble::tibble(
data_source = data_source[1:2],
signal = data_signals[1:2],
start_day = list(start_day_nomob)),
forecast_dates = forecast_dates,
as_of_override = function(x) lubridate::ymd(Sys.Date()),
incidence_period = incidence_period,
ahead = ahead,
geo_type = "state",
signal_aggregation = "list",
geo_values = "*",
apply_corrections = corrector,
featurize = dav_maker,
n = n,
lags = lags[1:2], # optionally use a list for different lags by
lambda = 0, # Just do quantile regression
sort = sort,
nonneg = nonneg)
library(tidyr)
library(purrr)
library(ggplot2)
theme_set(theme_bw())
competition <- c("COVIDhub-ensemble","COVIDhub-baseline",
"CMU-TimeSeries", "Karlen-pypm")
submitted <- lapply(competition[1:3], get_covidhub_predictions,
forecast_dates = forecast_dates,
signal = "deaths_incidence_num")
submitted[[4]] <- get_covidhub_predictions("Karlen-pypm",
forecast_dates = forecast_dates - 1,
signal = "deaths_incidence_num") %>%
mutate(forecast_date = forecast_date + 1)
submitted <- bind_rows(submitted) %>% filter(ahead < 5)
# Some fixes to make comparable
qar_dc <- bind_rows(mob, nomob) %>%
mutate(quantile = as.numeric(quantile),
incidence_period = "epiweek",
ahead = ahead %/% 7 + 1)
results <- evaluate_predictions(bind_rows(qar_dc, submitted),
backfill_buffer = 0,
geo_type = "state") %>%
intersect_averagers(c("forecaster"), c("forecast_date", "geo_value"))
We compare the new forecaster to
NOTE: Results are based on the following numbers of common locations
results %>% group_by(forecast_date) %>% summarise(n_distinct(geo_value))
## # A tibble: 6 x 2
## forecast_date `n_distinct(geo_value)`
## <date> <int>
## 1 2020-08-31 51
## 2 2020-09-28 50
## 3 2020-10-26 51
## 4 2020-11-23 52
## 5 2020-12-21 52
## 6 2021-01-18 52
Top line conclusions:
subtitle = sprintf("Forecasts made over %s to %s",
format(min(forecast_dates), "%B %d"),
format(max(forecast_dates), "%B %d"))
plot_canonical(results, x = "ahead", y = "ae", aggr = mean) +
labs(subtitle = subtitle, xlab = "Weeks ahead", ylab = "Mean AE") +
theme(legend.position = "bottom") +
scale_y_log10()
plot_canonical(results, x = "ahead", y = "wis", aggr = mean) +
labs(subtitle = subtitle, xlab = "Weeks ahead", ylab = "Mean WIS") +
theme(legend.position = "bottom") +
scale_y_log10()
plot_canonical(results, x = "ahead", y = "coverage_80", aggr = mean) +
labs(subtitle = subtitle, xlab = "Weeks ahead", ylab = "Mean Coverage") +
theme(legend.position = "bottom") +
coord_cartesian(ylim=c(0,1)) + geom_hline(yintercept = .8, color="black")
Top line conclusions:
theme_set(theme_bw())
plot_canonical(results, x = "forecast_date", y = "ae", aggr = mean,
grp_vars = c("forecaster","ahead"), facet_rows = "ahead") +
labs(subtitle = subtitle, xlab = "forecast date", ylab = "Mean AE") +
theme(legend.position = "bottom") +
scale_y_log10()
plot_canonical(results, x = "forecast_date", y = "wis", aggr = mean,
grp_vars = c("forecaster","ahead"), facet_rows = "ahead") +
labs(subtitle = subtitle, xlab = "forecast date", ylab = "Mean WIS") +
theme(legend.position = "bottom") +
scale_y_log10()
plot_canonical(results, x = "forecast_date", y = "coverage_80", aggr = mean,
grp_vars = c("forecaster","ahead"), facet_rows = "ahead") +
labs(subtitle = subtitle, xlab = "forecast date", ylab = "Mean Coverage") +
theme(legend.position = "bottom") +
coord_cartesian(ylim=c(0,1)) + geom_hline(yintercept = .8, color="black")
Relative to baseline; scale first then take the median.
plot_canonical(results, x = "ahead", y = "wis", aggr = median,
base_forecaster = "COVIDhub-baseline", scale_before_aggr = TRUE) +
labs(subtitle = subtitle, xlab = "Weeks ahead", ylab = "Median relative WIS") +
theme(legend.position = "bottom") +
geom_hline(yintercept = 1)
plot_canonical(results, x = "forecast_date", y = "wis", aggr = median,
grp_vars = c("forecaster", "ahead"), facet_rows = "ahead",
base_forecaster = "COVIDhub-baseline", scale_before_aggr = TRUE) +
labs(subtitle = subtitle, xlab = "Forecast date", ylab = "Median relative WIS") +
theme(legend.position = "bottom") +
geom_hline(yintercept = 1)
Relative to baseline; scale first then take the geometric mean, ignoring a few 0’s. I think this is potentially more useful than the median/mean for relative WIS (or relative AE), but I haven’t completely thought it through. Putting the results here to be provocative.
geom_mean <- function(x) prod(x)^(1/length(x))
plot_canonical(results %>% filter(wis > 0), x = "ahead", y = "wis",
aggr = geom_mean,
base_forecaster = "COVIDhub-baseline", scale_before_aggr = TRUE) +
labs(subtitle = subtitle,
xlab = "Weeks ahead", ylab = "Mean (geometric) relative WIS") +
theme(legend.position = "bottom") +
geom_hline(yintercept = 1)
plot_canonical(results %>% filter(wis > 0), x = "forecast_date", y = "wis",
aggr = geom_mean, facet_rows = "ahead",
grp_vars = c("forecaster", "ahead"),
base_forecaster = "COVIDhub-baseline", scale_before_aggr = TRUE) +
theme(legend.position = "bottom") +
labs(subtitle = subtitle,
xlab = "Forecast date", ylab = "Mean (geometric) relative WIS") +
geom_hline(yintercept = 1)
plot_canonical(results, x = "target_end_date", y = "wis", aggr = mean,
dots = TRUE, grp_vars = "forecaster") +
labs(subtitle = subtitle, xlab = "Target date", ylab = "Mean WIS") +
theme(legend.position = "bottom") +
scale_y_log10()
plot_canonical(results, x = "target_end_date", y = "wis", aggr = mean,
dots = TRUE, grp_vars = c("forecaster", "ahead"),
facet_rows = "ahead") +
labs(subtitle = subtitle, xlab = "Target date", ylab = "Mean WIS") +
theme(legend.position = "bottom") +
scale_y_log10()
maps <- results %>%
group_by(geo_value, forecaster) %>%
summarise(across(wis:ae, mean)) %>%
pivot_longer(wis:ae, names_to = "score") %>%
group_by(score) %>%
mutate(time_value = Sys.Date(),
r = max(value)) %>%
group_by(forecaster, .add = TRUE) %>%
group_split()
maps <- purrr::map(maps, ~as.covidcast_signal(
.x, signal = .x$score[1], data_source = .x$forecaster[1], geo_type = "state"))
maps <- purrr::map(maps,
~plot(.x, choro_col = scales::viridis_pal()(3),
range = c(0,.x$r[1])))
nfcasts <- length(unique(results$forecaster))
cowplot::plot_grid(plotlist = maps[1:nfcasts], ncol = 3)
cowplot::plot_grid(plotlist = maps[(nfcasts+1):length(maps)], ncol = 3)
pd <- evalcast:::setup_plot_trajectory(
bind_rows(qar_dc, submitted %>% filter(forecaster == "CMU-TimeSeries")),
intervals = 0.8,
geo_type = "state",
start_day = min(forecast_dates) - 60)
g <- ggplot(pd$truth_df, mapping = aes(x = target_end_date))
# build the fan
g <- g + geom_ribbon(
data = pd$quantiles_df,
mapping = aes(ymin = lower, ymax = upper, fill = forecaster,
group = interaction(forecaster,forecast_date)),
alpha = .1) +
scale_fill_viridis_d(begin=.15, end=.85)
# line layer
g <- g +
#geom_line(aes(y = .data$value.y), color = "#3182BD") + # corrected
geom_line(aes(y = value)) + # reported
geom_line(data = pd$points_df,
mapping = aes(y = value, color = forecaster,
group = interaction(forecaster,forecast_date)),
size = 1) +
geom_point(aes(y = value)) + # reported gets dots
geom_point(data = pd$points_df,
mapping = aes(y = value, color = forecaster),
size = 3) +
scale_color_viridis_d(begin=.15, end=.85)
g + theme_bw(base_size = 20) +
facet_wrap(~geo_value, scales = "free_y", ncol = 5) +
theme(legend.position = "top") + ylab("") + xlab("")